library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)
library(gprofiler2)
theme_set(theme_bw())
set.seed(666)gvc_agora_opentargets
Setup environment
Read and prep data
GVC genes (within 1Mb flanking regions of GVC loci) [OLD]
gvc <-
read_xlsx("GVC_1Mb_comparison_050224.xlsx") %>%
clean_names() %>%
separate(gene_id, c("gene_id", "version")) %>%
select(-version, -agora_nominated_list, -opentarget_info)
gvcgvc.genes <-
gvc %>%
arrange(absolute_distance) %>%
distinct(gene_id, .keep_all = TRUE) %>%
select(gene_id, gene_symbol, absolute_distance) %>%
arrange(gene_symbol)
gvc.genesgvc.genes %>% distinct(gene_symbol) %>% nrow()[1] 1344
GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes
Remove genes in APOE and HLA loci and manually add APOE and HLA genes (based on Bellenguez2022):
gvc.genes.apoe_hla <- gvc.genes %>% filter(gene_id %in% c("ENSG00000130203", "ENSG00000196735", "ENSG00000179344", "ENSG00000196126"))
gvc.genes.apoe_hlagvc.minus_apoe_hla <- gvc %>% filter(grouped_loci_gvc != "APOE / TOMM40" & grouped_loci_gvc != "HLA")
gvc.minus_apoe_hlagvc.genes.minus_apoe_hla <-
gvc.minus_apoe_hla %>%
arrange(absolute_distance) %>%
distinct(gene_id, .keep_all = TRUE) %>%
select(gene_id, gene_symbol, absolute_distance) %>%
bind_rows(gvc.genes.apoe_hla) %>%
arrange(gene_symbol)
gvc.genes.minus_apoe_hlaAgora genes
Alzheimer’s disease gene prioritization scores from Agora (see also related journal article):
ago1 <- read_json("agora.syn25741025.overall_scores.v12.2024-10-24.json", simplifyVector = TRUE) %>% as_tibble()
ago1Alzheimer’s disease genes (Agora nominated targets):
https://agora.adknowledgeportal.org/genes/nominated-targets
ago2 <- read_csv("agora.nominated-targets.gene-list.2024-10-24.csv")
ago2ago <- ago1 %>% filter(hgnc_symbol %in% ago2$`Gene Symbol`)
agoOpenTargets genes
Alzheimer’s disease gene prioritization scores from OpenTargets:
ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")
# ot <- read_tsv("OT-MONDO_0004975-associated-targets-10_24_2024-v24_09.tsv", show_col_types = FALSE, na = "No data")
otAdd Ensembl Gene IDs (WTF!):
otcols <- colnames(ot)
otensg <- gconvert(
query = ot$symbol,
organism = "hsapiens",
target= "ENSG",
mthreshold = Inf,
filter_na = TRUE) %>%
mutate(input_number = as.character(input_number)) %>%
left_join(ot %>% rownames_to_column(var = "input_number"), by = "input_number") %>%
select(ensembl_gene_id = target, otcols)
otensgCorrelation between Agora and OpenTargets scores
GVC genes
d.cor <-
gvc.genes %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id))d.cor %>% nrow()[1] 1345
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()[1] 56
d.cor %>%
drop_na(genetics_score, otGeneticsPortal) %>%
summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
unnest(cor)d.cor %>% nrow()[1] 1345
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()[1] 75
d.cor %>%
drop_na(target_risk_score, globalScore) %>%
summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
unnest(cor)All genes
d.cor <-
ago %>%
left_join(otensg, by = "ensembl_gene_id")d.cor %>% nrow()[1] 926
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()[1] 75
d.cor %>%
drop_na(genetics_score, otGeneticsPortal) %>%
summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
unnest(cor)d.cor %>% nrow()[1] 926
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()[1] 484
d.cor %>%
drop_na(target_risk_score, globalScore) %>%
summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
unnest(cor)Overlaps between GVC, Agora, and OpenTargets genes
x = list(
"GVC" = gvc.genes$gene_id,
"Agora" = ago$ensembl_gene_id,
"OpenTargets" = otensg$ensembl_gene_id
)library(VennDiagram)
grid.newpage()
v <- venn.diagram(
x,
fill = c("#FF0000", "#00FF00", "#0000FF"),
filename = NULL)
grid.draw(v)
p <- get.venn.partitions(x)
pORA of genes in overlaps
GVC ∩ Agora ∩ OpenTargets
genes <- p %>%
filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)# save overlap gene ids for later
overlap_gene_ids <- queryGVC ∩ Agora
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC ∩ OpenTargets
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩OpenTargets)∖(Agora)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Agora ∩ OpenTargets
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(Agora∩OpenTargets)∖(GVC)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ Agora) ∪ (GVC ∩ OpenTargets) ∪ (Agora ∩ OpenTargets)
genes <- p %>%
filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)", "(GVC∩OpenTargets)∖(Agora)", "(Agora∩OpenTargets)∖(GVC)")) %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(Agora ∩ OpenTargets) ∖ (GVC)
genes <- p %>%
filter(..set.. == "(Agora∩OpenTargets)∖(GVC)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ OpenTargets) ∖ (Agora)
genes <- p %>%
filter(..set.. == "(GVC∩OpenTargets)∖(Agora)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(OpenTargets) ∖ (GVC ∪ Agora)
genes <- p %>%
filter(..set.. == "(OpenTargets)∖(GVC∪Agora)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC ∩ Agora) ∖ (OpenTargets)
genes <- p %>%
filter(..set.. == "(GVC∩Agora)∖(OpenTargets)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(Agora) ∖ (GVC ∪ OpenTargets)
genes <- p %>%
filter(..set.. == "(Agora)∖(GVC∪OpenTargets)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)(GVC) ∖ (Agora ∪ OpenTargets)
genes <- p %>%
filter(..set.. == "(GVC)∖(Agora∪OpenTargets)") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))
genesquery <- genes %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)ORA of GVC genes
GVC genes (within 1Mb flanking regions of GVC loci)
unordered query
d0 <- gvc.genes %>% select(gene_id, gene_symbol)
d0query <- d0 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = FALSE, # <- UNORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes
unordered query
d0.minus_apoe_hla <- gvc.genes.minus_apoe_hla %>% select(gene_id, gene_symbol)
d0.minus_apoe_hlaquery <- d0.minus_apoe_hla %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = FALSE, # <- UNORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 200Kb flanking regions of GVC loci) minus APOE and HLA loci genes
unordered query
d0.minus_apoe_hla.200kb <- gvc.genes.minus_apoe_hla %>% filter(absolute_distance <= 200000) %>% select(gene_id, gene_symbol)
d0.minus_apoe_hla.200kbquery <- d0.minus_apoe_hla.200kb %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = FALSE, # <- UNORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 20Kb flanking regions of GVC loci) minus APOE and HLA loci genes
unordered query
d0.minus_apoe_hla.20kb <- gvc.genes.minus_apoe_hla %>% filter(absolute_distance <= 20000) %>% select(gene_id, gene_symbol)
d0.minus_apoe_hla.20kbquery <- d0.minus_apoe_hla.20kb %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = FALSE, # <- UNORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes, ordered by absolute distance from GVC loci
query ordered by absolute distance
d0.minus_apoe_hla <- gvc.genes.minus_apoe_hla %>% arrange(absolute_distance) %>% select(gene_id, gene_symbol)
d0.minus_apoe_hlaquery <- d0.minus_apoe_hla %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE, # <- ORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Perform alternative ORA of GO:CP using GOrilla <2024-10-24>:
d0.minus_apoe_hla %>% distinct(gene_id) %>% pull(gene_id) %>% write("query.txt")
read_tsv("gvc.genes.minus_apoe_hla.gorilla.tsv") %>% select(Description, everything())GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes sorted by Agora’s genetics_score
ago1 is used instead of ago to annotate a larger proportion of GVC genes with Agora scores
d1 <- gvc.genes.minus_apoe_hla %>%
left_join(ago1, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(genetics_score)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, genetics_score, otGeneticsPortal, everything())
d1query <- d1 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes sorted by OpenTargets’ otGeneticsPortal
ago1 is used instead of ago to annotate a larger proportion of GVC genes with Agora scores
d2 <- gvc.genes.minus_apoe_hla %>%
left_join(ago1, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(otGeneticsPortal)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, otGeneticsPortal, genetics_score, everything())
d2query <- d2 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes sorted by Agora’s target_risk_score
ago1 is used instead of ago to annotate a larger proportion of GVC genes with Agora scores
d3 <- gvc.genes.minus_apoe_hla %>%
left_join(ago1, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(target_risk_score)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, target_risk_score, globalScore, everything())
d3query <- d3 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes sorted by OpenTargets’ globalScore
ago1 is used instead of ago to annotate a larger proportion of GVC genes with Agora scores
d4 <- gvc.genes.minus_apoe_hla %>%
left_join(ago1, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(globalScore)) %>%
select(-c(symbol, hgnc_symbol)) %>%
select(gene_id, gene_symbol, globalScore, target_risk_score, everything())
d4query <- d4 %>% distinct(gene_id) %>% pull(gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)ORA of Agora genes
Agora genes sorted by genetics_score
d5 <- ago %>%
drop_na(genetics_score) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(genetics_score))
d5query <- d5 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Agora genes sorted by multi_omics_score
d6 <- ago %>%
drop_na(multi_omics_score) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(multi_omics_score))
d6query <- d6 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Agora genes sorted by target_risk_score
d7 <- ago %>%
drop_na(target_risk_score) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(target_risk_score))
d7query <- d7 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)ORA of OpenTargets genes
OpenTargets genes sorted by otGeneticsPortal
d8 <- otensg %>%
drop_na(otGeneticsPortal) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(otGeneticsPortal))
d8query <- d8 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)OpenTargets genes sorted by globalScore
d9 <- otensg %>%
drop_na(globalScore) %>%
sample_frac(1L) %>% # randomize row order before arranging
arrange(desc(globalScore))
d9query <- d9 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = TRUE,
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)ORA of GVC genes in table from manuscript
d10 <- read_xlsx("2024-08-29_GVC Table 1C - WORKING COPY.xlsx", sheet = "PG Gene List", skip = 1)query <-
d10 %>%
rename(gene = `GVC expanded list of possible genes (500kb)`) %>%
bind_rows(tibble(gene = "APOE")) %>%
distinct(gene) %>%
pull(gene)
gostres <- gost(
query = query,
organism = "hsapiens",
domain_scope = "annotated",
exclude_iea = TRUE,
ordered_query = FALSE, # <- UNORDERED QUERY!
significant = TRUE,
user_threshold = 0.005,
correction_method = "gSCS")
gostres$result %>% select(term_name, term_id, source, everything())gostplot(gostres, capped = FALSE, interactive = TRUE)Check missing OpenTargets scores in table from manuscript
t <- read_xlsx("8-23-2024 - GVC Table 1C - WORKING COPYL_MRC.xlsx", skip = 1, na = "No data") %>% janitor::clean_names() %>% select(gvc_expanded_list_of_possible_genes_500kb, open_target_scores_global, open_target_scores_genetics) %>% rename(symbol = gvc_expanded_list_of_possible_genes_500kb)t %>%
left_join(ot, by = "symbol") %>%
filter(round(open_target_scores_global, 4) != round(globalScore, 4)) %>%
select(symbol, open_target_scores_global, globalScore)t %>%
left_join(ot, by = "symbol") %>%
filter(round(open_target_scores_genetics, 4) != round(otGeneticsPortal, 4)) %>%
select(symbol, open_target_scores_genetics, otGeneticsPortal)GVC loci annotated with genes in overlaps
GVC ∩ Agora ∩ OpenTargets
gene_ids <- p %>%
filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
unnest(..values..) %>%
select(gene_id = ..values..) %>%
left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
distinct(gene_id) %>%
pull(gene_id)
length(gene_ids)[1] 75
gvc %>%
filter(gene_id %in% gene_ids) %>%
select(gvc_locus = grouped_loci_gvc, gene_id, gene_symbol) %>%
arrange(gene_symbol) %>%
mutate(gene = gene_symbol) %>%
# unite(gene, gene_id, gene_symbol, sep = ":", remove = FALSE) %>%
distinct(gvc_locus, gene, .keep_all = TRUE) %>%
group_by(gvc_locus) %>%
summarize(genes = str_c(gene, collapse = " | ")) %>%
select(gvc_locus, genes) %>%
gt::gt()| gvc_locus | genes |
|---|---|
| ABCA7 | ABCA7 | NDUFS7 |
| ABI3 / ACE | NGFR | ZNF652 |
| ACE | ACE |
| ADAM10 / MINDY2 | ADAM10 | ALDH1A2 | LIPC |
| ADAMTS4 | ADAMTS4 | FCER1G | NDUFS2 |
| ANK3 / CCDC6 | CCDC6 | SLC16A9 |
| ANKRD31 | ANKRD31 | ENC1 |
| APH1B | LACTB |
| APOE / TOMM40 | APOC1 | APOE | BCAM | MARK4 | NECTIN2 |
| APP | MRPL39 |
| APP / ADAMTS1 | ADAMTS1 |
| BCKDK / KAT8 / VKORC1 | BCKDK | STX4 | VKORC1 |
| BIN1 | BIN1 |
| CASS4 | CASS4 |
| CD2AP | CD2AP |
| CD33 | CD33 |
| CHRNE | ENO3 | RABEP1 | SLC25A11 | ZFP3 |
| CLU / PTK2B | CLU | EPHX2 | PTK2B | SCARA3 |
| CR1 | CR1 |
| CTSH | CTSH |
| DOC2A | DOC2A |
| ECHDC3 / USP6NL | USP6NL |
| EED / PICALM | DLG2 | PICALM |
| EPHA1 / EPHA1-AS1 | EPHA1 |
| HAVCR2 | CYFIP2 | HAVCR2 |
| HLA | HLA-DRA | HLA-DRB1 |
| ICA1 | NXPH1 |
| IDUA | CPLX1 |
| IL34 | MTSS2 |
| INPP5D | INPP5D |
| LILRB2 / TMC4 | LAIR1 |
| MADD / SPI1 | C1QTNF4 | NDUFS3 | NR1H3 | RAPSN | SPI1 |
| MS4A / MS4A2 / MS4A4A / MS4A6A | MRPL16 | MS4A2 | MS4A4A | MS4A6A |
| NDUFAF7 / PRKD3 | QPCT |
| NYAP1 / PILRA / SPDYE3 / ZCWPW1 | NYAP1 |
| OARD1 / TREM2 / TREML2 / UNC5CL | TREM2 |
| PLCG2 | PLCG2 | SDR42E1 |
| PLEKHA1 | HTRA1 |
| RABEP1 / SCIMP | ENO3 | RABEP1 | SLC25A11 | ZFP3 |
| RASGEF1C | MAPK9 |
| RIN3 / SLC24A4 | RIN3 | SLC24A4 |
| SHARPIN | PLEC |
| SIGLEC11 | NR1H2 |
| WNT3 | NSF |
Table of GVC genes sorted by combined rank (GVC absolute distance, Agora and OpenTargets scores)
GVC genes (within 1Mb flanking regions of GVC loci) minus APOE and HLA loci genes sorted by GVC’s absolute_distance, Agora’s target_risk_score and OpenTargets’ globalScore.
Genes are sorted based on a weighted average rank with the following weights:
- 0.50 for GVC’s
absolute_distance - 0.25 for Agora’s
target_risk_score - 0.25 for OpenTargets’
globalScore
ago1 is used instead of ago to annotate a larger proportion of GVC genes with Agora scores
weights <- c(0.50, 0.25, 0.25) # weights for gvc, agora, open_targets ranks
d <- gvc.genes.minus_apoe_hla %>%
left_join(ago1, by = join_by(gene_id == ensembl_gene_id)) %>%
left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
sample_frac(1L) %>% # randomize row order before ranking
mutate(gvc_rank = rank(absolute_distance, ties.method = "min"),
agora_rank = rank(-target_risk_score, ties.method = "min"),
opentargets_rank = rank(-globalScore, ties.method = "min")) %>%
mutate(combined_rank = rowSums(select(., gvc_rank, agora_rank, opentargets_rank) * weights)) %>%
mutate(combined_rank = rank(combined_rank, ties.method = "min")) %>%
arrange(combined_rank) %>%
select(gene_id,
gene_symbol,
combined_rank,
gvc_rank, agora_rank,
opentargets_rank,
absolute_distance,
target_risk_score,
genetics_score,
multi_omics_score,
globalScore,
otGeneticsPortal)
dDownload the table above as an Excel spreadsheet here.
read_tsv("gvc_1361.txt") %>% filter(GVC %notin% gvc.genes$gene_symbol) %>% arrange(GVC)Methods for manuscript
Phase 2. Pathway analysis of GVC, Agora and OpenTargets candidate AD genes. We conducted gene set over-representation analysis (ORA) of GVC, Agora and OpenTargets candidate AD gene lists using R[37] with the gprofiler2 package (see https://github.com/marcoralab/gvc_agora_opentargets), excluding electronic Gene Ontology (GO) annotations, and filtering results using a p-value significance threshold of 0.005 after multiple testing correction with the g:SCS algorithm.
We used the GVC gene list of 1,344 genes in the proximity of AD risk loci that we built as described above. We retrieved Agora’s gene list of Alzheimer’s disease nominated targets (site version 3.4.0; data version syn13363290-v68) from https://agora.adknowledgeportal.org/genes/nominated-targets and the Agora’s gene scores (data version syn25741025-v12) from https://www.synapse.org/Synapse:syn25741025 on October 24th 2024. We retrieved OpenTargets’ gene list of Alzheimer’s disease (EFO:MONDO_0004975) associated targets (data version v24_09) from https://platform.opentargets.org/disease/MONDO_0004975/associations on October 24th 2024.
We conducted ORA using the following candidate AD gene lists: 1) Agora’s gene list of Alzheimer’s disease nominated targets decreasingly ordered by Agora’s genetics, multi-omics, or target risk score; 2) OpenTargets’ gene list of Alzheimer’s disease associated targets decreasingly ordered by OpenTargets’ genetics portal or global score; 3) GVC gene list decreasingly ordered by the aforementioned Agora’s or OpenTargets’ scores; 4) lists corresponding to regions of the Venn diagram built using R[37] with the VennDiagram package (see https://github.com/marcoralab/gvc_agora_opentargets) and three sets corresponding to gene lists 1-3, decreasingly ordered by the aforementioned Agora’s or OpenTargets’ scores.
Print environment
sessioninfo::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.4.2 (2024-10-31)
os macOS Sonoma 14.7
system aarch64, darwin20
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2024-12-05
pandoc 3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
backports 1.5.0 2024-05-23 [1] CRAN (R 4.4.0)
bit 4.5.0 2024-09-20 [1] CRAN (R 4.4.1)
bit64 4.5.2 2024-09-22 [1] CRAN (R 4.4.1)
bitops 1.0-9 2024-10-03 [1] CRAN (R 4.4.1)
broom * 1.0.7 2024-09-26 [1] CRAN (R 4.4.1)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.4.0)
cli 3.6.3 2024-06-21 [1] CRAN (R 4.4.0)
colorspace 2.1-1 2024-07-26 [1] CRAN (R 4.4.0)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.4.0)
crosstalk 1.2.1 2023-11-23 [1] CRAN (R 4.4.0)
data.table 1.16.2 2024-10-10 [1] CRAN (R 4.4.1)
digest 0.6.37 2024-08-19 [1] CRAN (R 4.4.1)
dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.4.0)
evaluate 1.0.1 2024-10-10 [1] CRAN (R 4.4.1)
fansi 1.0.6 2023-12-08 [1] CRAN (R 4.4.0)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.4.0)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.4.0)
formatR 1.14 2023-01-17 [1] CRAN (R 4.4.0)
futile.logger * 1.4.3 2016-07-10 [1] CRAN (R 4.4.0)
futile.options 1.0.1 2018-04-20 [1] CRAN (R 4.4.0)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.4.0)
ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.4.0)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.4.1)
gprofiler2 * 0.2.3 2024-02-23 [1] CRAN (R 4.4.0)
gt 0.11.1 2024-10-04 [1] CRAN (R 4.4.1)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.4.1)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.4.0)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.4.0)
httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.4.0)
httr 1.4.7 2023-08-15 [1] CRAN (R 4.4.0)
janitor * 2.2.0 2023-02-02 [1] CRAN (R 4.4.0)
jsonlite * 1.8.9 2024-09-20 [1] CRAN (R 4.4.1)
knitr 1.49 2024-11-08 [1] CRAN (R 4.4.1)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.4.0)
lambda.r 1.2.4 2019-09-18 [1] CRAN (R 4.4.0)
later 1.3.2 2023-12-06 [1] CRAN (R 4.4.0)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.4.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.4.0)
lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.4.0)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.4.0)
mime 0.12 2021-09-28 [1] CRAN (R 4.4.0)
munsell 0.5.1 2024-04-01 [1] CRAN (R 4.4.0)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.4.0)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.4.0)
plotly 4.10.4 2024-01-13 [1] CRAN (R 4.4.0)
promises 1.3.0 2024-04-05 [1] CRAN (R 4.4.0)
purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.4.0)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.4.0)
Rcpp 1.0.13-1 2024-11-02 [1] CRAN (R 4.4.1)
RCurl 1.98-1.16 2024-07-11 [1] CRAN (R 4.4.0)
readr * 2.1.5 2024-01-10 [1] CRAN (R 4.4.0)
readxl * 1.4.3 2023-07-06 [1] CRAN (R 4.4.0)
rlang 1.1.4 2024-06-04 [1] CRAN (R 4.4.0)
rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.4.1)
rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.4.1)
sass 0.4.9 2024-03-15 [1] CRAN (R 4.4.0)
scales 1.3.0 2023-11-28 [1] CRAN (R 4.4.0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.4.0)
shiny 1.9.1 2024-08-01 [1] CRAN (R 4.4.0)
snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.4.0)
stringi 1.8.4 2024-05-06 [1] CRAN (R 4.4.0)
stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.4.0)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.4.0)
tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.4.0)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.4.0)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.4.0)
timechange 0.3.0 2024-01-18 [1] CRAN (R 4.4.0)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.4.0)
utf8 1.2.4 2023-10-22 [1] CRAN (R 4.4.0)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.4.0)
VennDiagram * 1.7.3 2022-04-12 [1] CRAN (R 4.4.0)
viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.4.0)
vroom 1.6.5 2023-12-05 [1] CRAN (R 4.4.0)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.4.1)
writexl 1.5.1 2024-10-04 [1] CRAN (R 4.4.1)
xfun 0.49 2024-10-31 [1] CRAN (R 4.4.1)
xml2 1.3.6 2023-12-04 [1] CRAN (R 4.4.0)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.4.0)
yaml 2.3.10 2024-07-26 [1] CRAN (R 4.4.0)
[1] /Users/marcoe02/.Rlib
[2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
──────────────────────────────────────────────────────────────────────────────